{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "cde8a9dc",
   "metadata": {},
   "source": [
    "# LangWatch Evaluation Tracking\n",
    "\n",
    "## Simple Evaluation Loop"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e4b3c5de",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LangWatch API key is already set, if you want to login again, please call as langwatch.login(relogin=True)\n"
     ]
    }
   ],
   "source": [
    "import langwatch\n",
    "\n",
    "langwatch.login()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6f664279",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2025-07-01 10:54:04,973 - langwatch.utils.initialization - INFO - Setting up LangWatch client...\n",
      "2025-07-01 10:54:04,981 - langwatch.client - INFO - Configuring OTLP exporter with endpoint: https://app.langwatch.ai/api/otel/v1/traces\n",
      "2025-07-01 10:54:04,981 - langwatch.client - INFO - Registering atexit handler to flush tracer provider on exit\n",
      "2025-07-01 10:54:04,982 - langwatch.client - INFO - Successfully configured tracer provider with OTLP exporter\n",
      "2025-07-01 10:54:04,982 - langwatch.utils.initialization - INFO - LangWatch client setup complete\n",
      "Follow the results at: https://app.langwatch.ai/demo/experiments/my-incredible-experiment?runId=neon-crane-of-philosophy\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0ab90f22fb2d48d0ac235cbe2ea88526",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import random\n",
    "import pandas as pd\n",
    "import time\n",
    "\n",
    "df = pd.DataFrame(\n",
    "    [\n",
    "        {\n",
    "            \"question\": \"What is LangWatch?\",\n",
    "            \"answer\": \"LangWatch is a platform for evaluating and improving language models.\",\n",
    "        },\n",
    "        {\n",
    "            \"question\": \"How do I use LangWatch?\",\n",
    "            \"answer\": \"You can use LangWatch by installing the LangWatch SDK and then calling the LangWatch API.\",\n",
    "        },\n",
    "        {\n",
    "            \"question\": \"Does LangWatch support multiple language models?\",\n",
    "            \"answer\": \"Yes, LangWatch is compatible with all language models by using LiteLLM under the hood.\",\n",
    "        },\n",
    "        {\n",
    "            \"question\": \"Can I visualize evaluation metrics in LangWatch?\",\n",
    "            \"answer\": \"Yes, LangWatch provides dashboards for visualizing key evaluation metrics.\",\n",
    "        },\n",
    "        {\n",
    "            \"question\": \"Is there a free tier for LangWatch?\",\n",
    "            \"answer\": \"LangWatch offers a free tier with limited usage, ideal for small projects and evaluation.\",\n",
    "        },\n",
    "        {\n",
    "            \"question\": \"Where can I find documentation for LangWatch?\",\n",
    "            \"answer\": \"You can find the official documentation on the LangWatch website or GitHub repository.\",\n",
    "        },\n",
    "        {\n",
    "            \"question\": \"![](https://i.imgur.com/Tb5hyby.jpeg)\",\n",
    "            \"answer\": \"This is a screenshot of LangWatch website\"\n",
    "        }\n",
    "    ]\n",
    ")\n",
    "\n",
    "evaluation = langwatch.evaluation.init(\"my-incredible-experiment\")\n",
    "\n",
    "\n",
    "@langwatch.trace()\n",
    "def agent(question):\n",
    "    time.sleep(random.randint(0, 10))\n",
    "    return {\"text\": \"foo bar\"}\n",
    "\n",
    "\n",
    "for index, row in evaluation.loop(df.iterrows()):\n",
    "    result = agent(row[\"question\"])  # your code\n",
    "\n",
    "    score = random.randint(0, 80) / 100 + 0.2\n",
    "    evaluation.log(\"sample_metric\", index=index, score=score, passed=score > 0.5)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0bf2e191",
   "metadata": {},
   "source": [
    "## Parallel Evaluation Loop"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "8fd7b230",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2025-06-04 23:08:39,668 - langwatch.client - INFO - Registering atexit handler to flush tracer provider on exit\n",
      "2025-06-04 23:08:39,668 - langwatch.client - WARNING - An existing global trace provider was found. LangWatch will not override it automatically, but instead is attaching another span processor and exporter to it. You can disable this warning by setting `ignore_global_tracer_provider_override_warning` to `True`.\n",
      "Follow the results at: http://localhost:5560/inbox-narrator/experiments/my-incredible-experiment?runId=ruddy-numbat-of-domination\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "513440119fc24b7b8ffb4257656ec2a8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Evaluating:   0%|          | 0/6 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import random\n",
    "import time\n",
    "\n",
    "langwatch.setup()\n",
    "evaluation = langwatch.evaluation.init(\"my-incredible-experiment\")\n",
    "\n",
    "@langwatch.trace()\n",
    "def agent(question):\n",
    "    time.sleep(random.randint(0, 10))\n",
    "    return \"foo parallel\"\n",
    "\n",
    "for index, row in evaluation.loop(df.iterrows(), threads=4):\n",
    "    def evaluate(index, row):\n",
    "        result = agent(row[\"question\"])\n",
    "        evaluation.log(\"sample_metric\", index=index, data={\"response\": result}, score=1)\n",
    "    evaluation.submit(evaluate, index, row)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c9fa8b21",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
